{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.model_selection import train_test_split \n",
    "from sklearn.metrics import log_loss\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "import seaborn as sns\n",
    "from catboost import CatBoostClassifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "#reading data\n",
    "train = pd.read_excel(\"data/Data_Train.xlsx\")\n",
    "test = pd.read_excel(\"data/Data_Test.xlsx\")\n",
    "submission = pd.read_excel(\"data/Sample_Submission.xlsx\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "               ID        Date  LicenseNo  FacilityID  FacilityName  \\\n",
      "0  31103489027986  26-04-2010       4744        8123          7715   \n",
      "1  10088999935915  21-06-2009       2973       12268         11664   \n",
      "2  40148966010272  01-05-2013      18223        1112           969   \n",
      "3  37157708563676  28-09-2015      20825       20007         19115   \n",
      "4  47478049564374  09-12-2015       2136       16867         10409   \n",
      "\n",
      "            Type  Street         City       State  LocationID  \\\n",
      "0     RESTAURANT   15522  id-11235901  id_1890134     81876.0   \n",
      "1  GROCERY STORE    3057  id-11235901  id_1890134     81862.0   \n",
      "2     RESTAURANT   14988  id-11235901  id_1890134     81883.0   \n",
      "3     RESTAURANT    3661  id-11235901  id_1890134     81859.0   \n",
      "4     RESTAURANT    7876  id-11235901  id_1890134     81886.0   \n",
      "\n",
      "                  Reason  SectionViolations RiskLevel     Geo_Loc  \\\n",
      "0                CANVASS               33.0      High  locid16406   \n",
      "1              COMPLAINT               33.0      High    locid878   \n",
      "2                CANVASS                NaN      High   locid3368   \n",
      "3  CANVASS RE-INSPECTION               31.0    Medium  locid11839   \n",
      "4              COMPLAINT               30.0      High  locid12264   \n",
      "\n",
      "   Inspection_Results  \n",
      "0                   4  \n",
      "1                   4  \n",
      "2                   6  \n",
      "3                   4  \n",
      "4                   4  \n"
     ]
    }
   ],
   "source": [
    "print(train.head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "#split date column in day, month, year\n",
    "train[['Day','Month','Year']] = train.Date.str.split(\"-\",expand=True)\n",
    "test[['Day','Month','Year']] = test.Date.str.split(\"-\",expand=True)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x2642e4f5548>"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZEAAAEHCAYAAABvHnsJAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAb6UlEQVR4nO3df7RdZX3n8ffHROSHxCRwYTAJDcWMikwNcAtR1iglNgRrTaZDOmRVE2naWAYcf83YoF2mBbMWjrZU/EEnJZHEojFGkdQGYlYAtYyB3CDyK9LcBkquQXI1AUFGaOhn/jjPLcebk+TcnZxzuMnntdZZZ5/vfvY+z/ZHPnfv/ZxnyzYRERFVvKzTHYiIiOErIRIREZUlRCIiorKESEREVJYQiYiIykZ2ugPtdvzxx3vixImd7kZExLCxadOmn9ruarTusAuRiRMn0tPT0+luREQMG5L+ZW/rcjkrIiIqS4hERERlCZGIiKgsIRIREZW1NEQkfVDSg5IekPQVSUdKOkXSXZK2SPqqpCNK21eUz71l/cS6/VxR6g9LuqCuPr3UeiUtaOWxRETEnloWIpLGAf8D6LZ9OjACuBj4JHCN7UnALmBe2WQesMv2a4BrSjsknVa2ewMwHfiCpBGSRgCfBy4ETgNml7YREdEmrb6cNRI4StJI4GjgceB8YFVZvwyYWZZnlM+U9VMlqdRX2H7O9iNAL3B2efXa3mr7eWBFaRsREW3SshCx/WPg08Bj1MLjKWAT8KTt3aVZHzCuLI8DtpVtd5f2x9XXB22zt/oeJM2X1COpp7+//8APLiIigNZezhpD7czgFODVwDHULj0NNvBAE+1l3VDrexbtxba7bXd3dTX80WVERFTQyl+svw14xHY/gKRvAG8GRksaWc42xgPbS/s+YALQVy5/vQrYWVcfUL/N3uoR0YTPffjvO92Fhi7/y9/tdBeiSa28J/IYMEXS0eXexlTgIeB24KLSZi5wc1leXT5T1t/m2mMXVwMXl9FbpwCTgLuBjcCkMtrrCGo331e38HgiImKQlp2J2L5L0irgHmA38ANgMfAPwApJnyi1JWWTJcCXJPVSOwO5uOznQUkrqQXQbuAy2y8ASLocWEtt5NdS2w+26ngiImJPLZ2A0fZCYOGg8lZqI6sGt/0lMGsv+1kELGpQXwOsOfCeRkREFfnFekREVJYQiYiIyhIiERFRWUIkIiIqS4hERERlCZGIiKgsIRIREZUlRCIiorKESEREVJYQiYiIyhIiERFRWUIkIiIqS4hERERlCZGIiKgsIRIREZUlRCIiorKESEREVNayEJH0Wkn31r1+LukDksZKWidpS3kfU9pL0rWSeiXdJ+nMun3NLe23SJpbVz9L0v1lm2vLs9wjIqJNWhYith+2Pdn2ZOAs4FngJmABsN72JGB9+QxwITCpvOYD1wFIGkvtEbvnUHus7sKB4Clt5tdtN71VxxMREXtq1+WsqcA/2/4XYAawrNSXATPL8gxguWs2AKMlnQRcAKyzvdP2LmAdML2sG2X7+7YNLK/bV0REtEG7QuRi4Ctl+UTbjwOU9xNKfRywrW6bvlLbV72vQX0PkuZL6pHU09/ff4CHEhERA1oeIpKOAN4JfG1/TRvUXKG+Z9FebLvbdndXV9d+uhEREc1qx5nIhcA9tp8on58ol6Io7ztKvQ+YULfdeGD7furjG9QjIqJN2hEis3nxUhbAamBghNVc4Oa6+pwySmsK8FS53LUWmCZpTLmhPg1YW9Y9LWlKGZU1p25fERHRBiNbuXNJRwO/Dby3rnw1sFLSPOAxYFaprwHeDvRSG8l1CYDtnZKuAjaWdlfa3lmWLwVuAI4CbimviIhok5aGiO1ngeMG1X5GbbTW4LYGLtvLfpYCSxvUe4DTD0pnIyJiyPKL9YiIqCwhEhERlSVEIiKisoRIRERUlhCJiIjKEiIREVFZQiQiIipLiERERGUJkYiIqCwhEhERlSVEIiKisoRIRERUlhCJiIjKEiIREVFZQiQiIipLiERERGUJkYiIqKylISJptKRVkn4kabOkN0kaK2mdpC3lfUxpK0nXSuqVdJ+kM+v2M7e03yJpbl39LEn3l22uLc9aj4iINmn1mchngFttvw54I7AZWACstz0JWF8+A1wITCqv+cB1AJLGAguBc4CzgYUDwVPazK/bbnqLjyciIuq0LEQkjQLeAiwBsP287SeBGcCy0mwZMLMszwCWu2YDMFrSScAFwDrbO23vAtYB08u6Uba/X57PvrxuXxER0QatPBP5daAf+KKkH0i6XtIxwIm2Hwco7yeU9uOAbXXb95Xavup9DeoREdEmrQyRkcCZwHW2zwB+wYuXrhppdD/DFep77liaL6lHUk9/f/++ex0REU1rZYj0AX227yqfV1ELlSfKpSjK+4669hPqth8PbN9PfXyD+h5sL7bdbbu7q6vrgA4qIiJe1LIQsf0TYJuk15bSVOAhYDUwMMJqLnBzWV4NzCmjtKYAT5XLXWuBaZLGlBvq04C1Zd3TkqaUUVlz6vYVERFtMLLF+38fcKOkI4CtwCXUgmulpHnAY8Cs0nYN8HagF3i2tMX2TklXARtLuytt7yzLlwI3AEcBt5RXRES0SUtDxPa9QHeDVVMbtDVw2V72sxRY2qDeA5x+gN2MiIiK8ov1iIioLCESERGVJUQiIqKyhEhERFSWEImIiMoSIhERUVlCJCIiKkuIREREZQmRiIioLCESERGVJUQiIqKyhEhERFSWEImIiMoSIhERUVlCJCIiKkuIREREZQmRiIiorKUhIulRSfdLuldST6mNlbRO0pbyPqbUJelaSb2S7pN0Zt1+5pb2WyTNraufVfbfW7ZVK48nIiJ+VTvORH7L9mTbA4/JXQCstz0JWF8+A1wITCqv+cB1UAsdYCFwDnA2sHAgeEqb+XXbTW/94URExIBOXM6aASwry8uAmXX15a7ZAIyWdBJwAbDO9k7bu4B1wPSybpTt75fnsy+v21dERLRBq0PEwLclbZI0v9ROtP04QHk/odTHAdvqtu0rtX3V+xrUIyKiTUa2eP/n2t4u6QRgnaQf7aNto/sZrlDfc8e1AJsPcPLJJ++7xxER0bSWnonY3l7edwA3Ubun8US5FEV531Ga9wET6jYfD2zfT318g3qjfiy23W27u6ur60APKyIiipaFiKRjJB07sAxMAx4AVgMDI6zmAjeX5dXAnDJKawrwVLnctRaYJmlMuaE+DVhb1j0taUoZlTWnbl8REdEGrbycdSJwUxl1OxL4su1bJW0EVkqaBzwGzCrt1wBvB3qBZ4FLAGzvlHQVsLG0u9L2zrJ8KXADcBRwS3lFRESbtCxEbG8F3tig/jNgaoO6gcv2sq+lwNIG9R7g9APubEREVJJfrEdERGUJkYiIqCwhEhERlSVEIiKisoRIRERU1lSISFrfTC0iIg4v+xziK+lI4Gjg+PJDv4GpRkYBr25x3yIi4iVuf78TeS/wAWqBsYkXQ+TnwOdb2K+IiBgG9hkitj8DfEbS+2x/tk19ioiIYaKpX6zb/qykNwMT67exvbxF/YqIiGGgqRCR9CXgVOBe4IVSHngQVEREHKaanTurGzitzG8VEREBNP87kQeA/9DKjkRExPDT7JnI8cBDku4Gnhso2n5nS3oVERHDQrMh8uet7ERERAxPzY7O+k6rOxIREcNPs6OznqY2GgvgCODlwC9sj2pVxyIi4qWvqRvrto+1Paq8jgT+K/C5ZraVNELSDyR9q3w+RdJdkrZI+qqkI0r9FeVzb1k/sW4fV5T6w5IuqKtPL7VeSQuaP+yIiDgYKs3ia/ubwPlNNn8/sLnu8yeBa2xPAnYB80p9HrDL9muAa0o7JJ0GXAy8AZgOfKEE0whqU69cCJwGzC5tIyKiTZqdxff36l4XSbqaFy9v7Wu78cDvANeXz6IWPqtKk2XAzLI8o3ymrJ9a2s8AVth+zvYjQC9wdnn12t5q+3lgRWkbERFt0uzorN+tW94NPEpz/2D/NfAR4Njy+TjgSdu7y+c+YFxZHgdsA7C9W9JTpf04YEPdPuu32Taofk4TfYqIiIOk2dFZlwx1x5LeAeywvUnSeQPlRrvfz7q91RudRTU8O5I0H5gPcPLJJ++j1xERMRTNXs4aL+kmSTskPSHp6+VS1b6cC7xT0qPULjWdT+3MZLSkgfAaD2wvy33AhPJ9I4FXATvr64O22Vt9D7YX2+623d3V1dXMIUdERBOavbH+RWA1teeKjAP+vtT2yvYVtsfbnkjtxvhttv8AuB24qDSbC9xclleXz5T1t5W5ulYDF5fRW6cAk4C7gY3ApDLa64jyHaubPJ6IiDgImg2RLttftL27vG4Aqv5J/6fAhyT1UrvnsaTUlwDHlfqHgAUAth8EVgIPAbcCl9l+odxXuRxYS23018rSNiIi2qTZG+s/lfQu4Cvl82zgZ81+ie07gDvK8lZqI6sGt/klMGsv2y8CFjWorwHWNNuPiIg4uJo9E/lD4PeBnwCPU7vcNOSb7RERcWhp9kzkKmCu7V0AksYCn6YWLhERcZhq9kzkNwYCBMD2TuCM1nQpIiKGi2ZD5GWSxgx8KGcizZ7FRETEIarZIPhL4P9KWkXtB32/T4Mb3RERcXhp9hfryyX1UPvBoIDfs/1QS3sWEREveU1fkiqhkeCIiIh/V2kq+IiICEiIRETEAUiIREREZQmRiIioLCESERGVJUQiIqKyhEhERFSWEImIiMoSIhERUVlCJCIiKmtZiEg6UtLdkn4o6UFJf1Hqp0i6S9IWSV8tz0enPEP9q5J6y/qJdfu6otQflnRBXX16qfVKWtCqY4mIiMZaeSbyHHC+7TcCk4HpkqYAnwSusT0J2AXMK+3nAbtsvwa4prRD0mnAxcAbgOnAFySNkDQC+DxwIXAaMLu0jYiINmlZiLjmmfLx5eVlajMBryr1ZcDMsjyjfKasnypJpb7C9nO2HwF6qT2j/Wyg1/ZW288DK0rbiIhok5beEylnDPcCO4B1wD8DT9reXZr0AePK8jhgG0BZ/xRwXH190DZ7q0dERJu0NERsv2B7MjCe2pnD6xs1K+/ay7qh1vcgab6kHkk9/f39++94REQ0pS2js2w/CdwBTAFGSxp4jsl4YHtZ7gMmAJT1rwJ21tcHbbO3eqPvX2y723Z3V1fXwTikiIigtaOzuiSNLstHAW8DNgO3AxeVZnOBm8vy6vKZsv422y71i8vorVOAScDdwEZgUhntdQS1m++rW3U8ERGxp6afbFjBScCyMorqZcBK29+S9BCwQtIngB8AS0r7JcCXJPVSOwO5GMD2g5JWUnuq4m7gMtsvAEi6HFgLjACW2n6whccTERGDtCxEbN8HnNGgvpXa/ZHB9V8Cs/ayr0XAogb1NcCaA+7sMPbYlf+p011o6OSP39/pLkREG+QX6xERUVlCJCIiKkuIREREZQmRiIioLCESERGVJUQiIqKyhEhERFSWEImIiMoSIhERUVlCJCIiKkuIREREZQmRiIioLCESERGVJUQiIqKyhEhERFSWEImIiMoSIhERUVlCJCIiKmtZiEiaIOl2SZslPSjp/aU+VtI6SVvK+5hSl6RrJfVKuk/SmXX7mlvab5E0t65+lqT7yzbXSlKrjiciIvbUsmesA7uBD9u+R9KxwCZJ64D3AOttXy1pAbAA+FPgQmBSeZ0DXAecI2kssBDoBlz2s9r2rtJmPrCB2rPWpwO3tPCYIuIlZNG7Lup0Fxr62N+t6nQX2qZlZyK2H7d9T1l+GtgMjANmAMtKs2XAzLI8A1jumg3AaEknARcA62zvLMGxDphe1o2y/X3bBpbX7SsiItqgLfdEJE0EzgDuAk60/TjUggY4oTQbB2yr26yv1PZV72tQb/T98yX1SOrp7+8/0MOJiIii5SEi6ZXA14EP2P75vpo2qLlCfc+ivdh2t+3urq6u/XU5IiKa1NIQkfRyagFyo+1vlPIT5VIU5X1HqfcBE+o2Hw9s3099fIN6RES0SStHZwlYAmy2/Vd1q1YDAyOs5gI319XnlFFaU4CnyuWutcA0SWPKSK5pwNqy7mlJU8p3zanbV0REtEErR2edC7wbuF/SvaX2UeBqYKWkecBjwKyybg3wdqAXeBa4BMD2TklXARtLuytt7yzLlwI3AEdRG5WVkVkREW3UshCx/Y80vm8BMLVBewOX7WVfS4GlDeo9wOkH0M2IiDgA+cV6RERUlhCJiIjKEiIREVFZQiQiIipLiERERGUJkYiIqCwhEhERlSVEIiKisoRIRERUlhCJiIjKEiIREVFZQiQiIipLiERERGWtnAo+IiL2YvOi2zrdhb16/cfOb7ptzkQiIqKyhEhERFSWEImIiMpa+Yz1pZJ2SHqgrjZW0jpJW8r7mFKXpGsl9Uq6T9KZddvMLe23SJpbVz9L0v1lm2vLc9YjIqKNWnkmcgMwfVBtAbDe9iRgffkMcCEwqbzmA9dBLXSAhcA5wNnAwoHgKW3m1203+LsiIqLFWhYitr8L7BxUngEsK8vLgJl19eWu2QCMlnQScAGwzvZO27uAdcD0sm6U7e+XZ7Mvr9tXRES0SbvviZxo+3GA8n5CqY8DttW16yu1fdX7GtQbkjRfUo+knv7+/gM+iIiIqHmp3FhvdD/DFeoN2V5su9t2d1dXV8UuRkTEYO0OkSfKpSjK+45S7wMm1LUbD2zfT318g3pERLRRu0NkNTAwwmoucHNdfU4ZpTUFeKpc7loLTJM0ptxQnwasLeueljSljMqaU7eviIhok5ZNeyLpK8B5wPGS+qiNsroaWClpHvAYMKs0XwO8HegFngUuAbC9U9JVwMbS7krbAzfrL6U2Auwo4JbyioiINmpZiNievZdVUxu0NXDZXvazFFjaoN4DnH4gfYyIiAPzUrmxHhERw1BCJCIiKkuIREREZQmRiIioLCESERGVJUQiIqKyhEhERFSWEImIiMpa9mPDiMPBd97y1k53oaG3fvc7ne5CHCZyJhIREZUlRCIiorKESEREVJYQiYiIyhIiERFRWUIkIiIqS4hERERl+Z1IdNS5nz23011o6M733dnpLkQMC8P+TETSdEkPS+qVtKDT/YmIOJwM6xCRNAL4PHAhcBowW9Jpne1VRMThY1iHCHA20Gt7q+3ngRXAjA73KSLisCHbne5DZZIuAqbb/qPy+d3AObYvH9RuPjC/fHwt8HCLunQ88NMW7bsd0v/OSv87azj3v9V9/zXbXY1WDPcb62pQ2yMVbS8GFre8M1KP7e5Wf0+rpP+dlf531nDufyf7PtwvZ/UBE+o+jwe2d6gvERGHneEeIhuBSZJOkXQEcDGwusN9iog4bAzry1m2d0u6HFgLjACW2n6wg11q+SWzFkv/Oyv976zh3P+O9X1Y31iPiIjOGu6XsyIiooMSIhERUVlC5CAZztOvSFoqaYekBzrdlyokTZB0u6TNkh6U9P5O92koJB0p6W5JPyz9/4tO92moJI2Q9ANJ3+p0X4ZK0qOS7pd0r6SeTvdnqCSNlrRK0o/K/wfe1Nbvzz2RA1emX/kn4LepDTveCMy2/VBHO9YkSW8BngGW2z690/0ZKkknASfZvkfSscAmYOYw+s9fwDG2n5H0cuAfgffb3tDhrjVN0oeAbmCU7Xd0uj9DIelRoNv2sPyhoaRlwPdsX19GqR5t+8l2fX/ORA6OYT39iu3vAjs73Y+qbD9u+56y/DSwGRjX2V41zzXPlI8vL69h89edpPHA7wDXd7ovhxtJo4C3AEsAbD/fzgCBhMjBMg7YVve5j2H0j9ihRNJE4Azgrs72ZGjK5aB7gR3AOtvDqf9/DXwE+LdOd6QiA9+WtKlMkTSc/DrQD3yxXE68XtIx7exAQuTgaGr6lWgtSa8Evg58wPbPO92fobD9gu3J1GZdOFvSsLisKOkdwA7bmzrdlwNwru0zqc0Gflm5vDtcjATOBK6zfQbwC6Ct92QTIgdHpl/psHIv4evAjba/0en+VFUuRdwBTO9wV5p1LvDOcl9hBXC+pL/rbJeGxvb28r4DuIna5enhog/oqztzXUUtVNomIXJwZPqVDio3ppcAm23/Vaf7M1SSuiSNLstHAW8DftTZXjXH9hW2x9ueSO1/97fZfleHu9U0SceUwRiUy0DTgGEzStH2T4Btkl5bSlOBtg4oGdbTnrxUvASnXxkSSV8BzgOOl9QHLLS9pLO9GpJzgXcD95f7CgAftb2mg30aipOAZWWU38uAlbaH3VDZYepE4Kba3yGMBL5s+9bOdmnI3gfcWP6A3Qpc0s4vzxDfiIioLJezIiKisoRIRERUlhCJiIjKEiIREVFZQiQiIipLiERERGUJkTjkSHpm/61a+v3vkfTqus/XSzrtIO7/zyX9uExd/pCk2Qdr33Xf8aik48s04//9YO8/Dh0JkYiD7z3Av4eI7T9qwbT015S5tmYA/6dM+9IKo4GESOxVQiQOWZLOk3RH3QN7bixTpCDp6vJX/H2SPl1qN0j6G0nfk/RPZXLBgRl2PyVpY2n/3rrv+Eh5oNEPyz4vovZcjRvLmcJRpQ/dpf3s0v4BSZ+s288zkhaV/WyQdGIzx2h7C/AsMKbs51RJt5YZab8n6XWlPqt85w8lfbfU3iPpc3V9+Jak8wZ9xdXAqeVYPiXpJEnfLZ8fkPSfh/RfShxyMu1JHOrOAN5AbULMO4FzJT0E/BfgdbY9MG9VMRF4K3AqcLuk1wBzgKds/6akVwB3Svo28DpgJnCO7WcljbW9s0yB8z9t9wCU3KJc4vokcBawi9r04zNtfxM4Bthg+2OS/jfwx8An9ndwks4EtpTJAwEWA39ie4ukc4AvAOcDHwcusP3jQce7PwuA08tZD5I+DKy1vahM03L0EPYVh6CESBzq7rbdB1Dm1ZoIbAB+CVwv6R+A+nmqVtr+N2CLpK3UgmIa8BvlLAPgVcAkahMlftH2swC29/dgr98E7rDdX/pzI7UHCn0TeL6uH5uoPSVzXz4o6Y+pPU9ietnfK4E3A18bCC7gFeX9TuAGSSuBA5nleCOwtFw++6bte/e3QRzacjkrDnXP1S2/AIy0vZvadN9fp3YmUT/h3uDJ5EzteTHvsz25vE6x/e1SH8rkc42eOzPgX/3iRHYvsP8/8K6x/VrgvwHLJR1J7f/PT9b1c7Lt1wPY/hPgz6g9suBeSccBu/nVfwOO3N8BlKdgvgX4MfAlSXP2t00c2hIicdgpf7G/qszy+wFgct3qWZJeJulUan/lP0xtduZLB25eS/qPZdrwbwN/KOnoUh9b9vE0cGyDr74LeGsZ9TQCmA1850COpTw7pQeYWx7E9YikWaU/kvTGsnyq7btsfxz4KbUweRSYXI53Ao2fo/ErxyLp16g9hOpvqU2/39ZnV8RLTy5nxeHoWODm8te7gA/WrXuY2j/sJ1K7t/BLSddTuwx2T7kx3w/MtH2rpMlAj6TngTXAR4EbgL+R9P+ANw3s2Pbjkq4Abi/fu8b2zQfheK4Evizpb4E/AK6T9GfUntW+Avgh8ClJk8r3ri81gEeA+6k9Q+OewTu2/TNJd0p6ALiltPtfkv4VeIba/aI4jGUq+IhC0g3At2yv6nRfIoaLXM6KiIjKciYS8RIl6WPArEHlr9le1In+RDSSEImIiMpyOSsiIipLiERERGUJkYiIqCwhEhERlf1/fKrbxhhKtn8AAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.countplot(train.Inspection_Results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "CANVASS                                   77943\n",
       "LICENSE                                   19645\n",
       "CANVASS RE-INSPECTION                     15705\n",
       "COMPLAINT                                 13572\n",
       "LICENSE RE-INSPECTION                      6796\n",
       "COMPLAINT RE-INSPECTION                    5655\n",
       "SHORT FORM COMPLAINT                       5094\n",
       "SUSPECTED FOOD POISONING                    631\n",
       "CONSULTATION                                503\n",
       "TAG REMOVAL                                 456\n",
       "LICENSE-TASK FORCE                          444\n",
       "RECENT INSPECTION                           248\n",
       "OUT OF BUSINESS                             220\n",
       "TASK FORCE LIQUOR 1475                      187\n",
       "SUSPECTED FOOD POISONING RE-INSPECTION      147\n",
       "COMPLAINT-FIRE                              120\n",
       "SHORT FORM FIRE-COMPLAINT                    77\n",
       "Name: Reason, dtype: int64"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.Reason.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    108375.000000\n",
       "mean         24.129153\n",
       "std          12.657456\n",
       "min           1.000000\n",
       "25%          14.000000\n",
       "50%          31.000000\n",
       "75%          33.000000\n",
       "max          70.000000\n",
       "Name: SectionViolations, dtype: float64"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train['SectionViolations'].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['Type', 'LocationID', 'SectionViolations']\n"
     ]
    }
   ],
   "source": [
    "#list of missing data columns\n",
    "missing_cols = [i for i,k in dict(train.isnull().sum()).items() if k > 0]\n",
    "print(missing_cols)\n",
    "#new columns \n",
    "for col in missing_cols:\n",
    "    train[col + '_was_missing'] = train[col].isnull()\n",
    "    test[col + '_was_missing'] = test[col].isnull()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "32.0 32.0\n"
     ]
    }
   ],
   "source": [
    "sv_mode_train = train['SectionViolations'].value_counts().index[0]\n",
    "sv_mode_test = test['SectionViolations'].value_counts().index[0]\n",
    "\n",
    "print(sv_mode_test, sv_mode_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "#filling missing values\n",
    "train['SectionViolations'] = train['SectionViolations'].fillna(0)\n",
    "test['SectionViolations'] = test['SectionViolations'].fillna(0)\n",
    "\n",
    "train['Type'] = train['Type'].fillna(\"BLANK\")\n",
    "test['Type'] = test['Type'].fillna(\"BLANK\")\n",
    "\n",
    "train['LocationID'] =train['LocationID'].fillna(0.0)\n",
    "test['LocationID'] = test['LocationID'].fillna(0.0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "#categorizing low value counts to others\n",
    "other_list =(train.Type.value_counts() > 0) & (train.Type.value_counts() < 30)\n",
    "other_list = (other_list[other_list.values]).index\n",
    "\n",
    "train_others = [i if i not in other_list else 'other 'for i in train.Type]\n",
    "test_others = [i if i not in other_list else 'other 'for i in test.Type]\n",
    "\n",
    "train['Type_New'] = train_others\n",
    "test['Type_New'] = test_others"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "#separating features and labels\n",
    "features = ['LicenseNo','SectionViolations','Geo_Loc','LocationID_was_missing',\n",
    "            'Type_was_missing','Date',\n",
    "            'Reason', 'Year','Type','SectionViolations_was_missing']\n",
    "X = train[features]\n",
    "y = train.Inspection_Results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "#splitting into train and validation sets\n",
    "train_X, val_X, train_y, val_y = train_test_split(X, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "#list of categorical columns\n",
    "categorical_features = np.where(X.dtypes =='object')[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "model_catboost = CatBoostClassifier(\n",
    "    depth=8, iterations= 250, learning_rate= 0.06)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0:\tlearn: 1.7008333\ttotal: 2.1s\tremaining: 8m 42s\n",
      "1:\tlearn: 1.5309940\ttotal: 4.09s\tremaining: 8m 27s\n",
      "2:\tlearn: 1.3992025\ttotal: 6.15s\tremaining: 8m 26s\n",
      "3:\tlearn: 1.2932687\ttotal: 7.98s\tremaining: 8m 10s\n",
      "4:\tlearn: 1.2008666\ttotal: 10.2s\tremaining: 8m 17s\n",
      "5:\tlearn: 1.1234931\ttotal: 12.2s\tremaining: 8m 15s\n",
      "6:\tlearn: 1.0579163\ttotal: 14.3s\tremaining: 8m 17s\n",
      "7:\tlearn: 1.0011230\ttotal: 16.3s\tremaining: 8m 12s\n",
      "8:\tlearn: 0.9503972\ttotal: 18.4s\tremaining: 8m 13s\n",
      "9:\tlearn: 0.9062562\ttotal: 20.8s\tremaining: 8m 18s\n",
      "10:\tlearn: 0.8656853\ttotal: 22.9s\tremaining: 8m 17s\n",
      "11:\tlearn: 0.8295685\ttotal: 25.1s\tremaining: 8m 18s\n",
      "12:\tlearn: 0.7967613\ttotal: 27.4s\tremaining: 8m 19s\n",
      "13:\tlearn: 0.7673257\ttotal: 29.4s\tremaining: 8m 15s\n",
      "14:\tlearn: 0.7404659\ttotal: 31.6s\tremaining: 8m 15s\n",
      "15:\tlearn: 0.7155311\ttotal: 34.1s\tremaining: 8m 18s\n",
      "16:\tlearn: 0.6922760\ttotal: 36.5s\tremaining: 8m 19s\n",
      "17:\tlearn: 0.6713098\ttotal: 38.4s\tremaining: 8m 15s\n",
      "18:\tlearn: 0.6516911\ttotal: 40.7s\tremaining: 8m 14s\n",
      "19:\tlearn: 0.6336769\ttotal: 43.1s\tremaining: 8m 15s\n",
      "20:\tlearn: 0.6169551\ttotal: 45.2s\tremaining: 8m 12s\n",
      "21:\tlearn: 0.6016236\ttotal: 48.2s\tremaining: 8m 19s\n",
      "22:\tlearn: 0.5870285\ttotal: 50.4s\tremaining: 8m 17s\n",
      "23:\tlearn: 0.5740014\ttotal: 52.7s\tremaining: 8m 16s\n",
      "24:\tlearn: 0.5610016\ttotal: 54.7s\tremaining: 8m 12s\n",
      "25:\tlearn: 0.5489443\ttotal: 56.7s\tremaining: 8m 8s\n",
      "26:\tlearn: 0.5378770\ttotal: 58.8s\tremaining: 8m 5s\n",
      "27:\tlearn: 0.5279475\ttotal: 1m 1s\tremaining: 8m 5s\n",
      "28:\tlearn: 0.5185392\ttotal: 1m 3s\tremaining: 8m 3s\n",
      "29:\tlearn: 0.5097341\ttotal: 1m 5s\tremaining: 8m 2s\n",
      "30:\tlearn: 0.5012743\ttotal: 1m 8s\tremaining: 8m 1s\n",
      "31:\tlearn: 0.4939125\ttotal: 1m 10s\tremaining: 8m 3s\n",
      "32:\tlearn: 0.4866976\ttotal: 1m 13s\tremaining: 8m\n",
      "33:\tlearn: 0.4801787\ttotal: 1m 15s\tremaining: 8m 1s\n",
      "34:\tlearn: 0.4740341\ttotal: 1m 18s\tremaining: 8m 1s\n",
      "35:\tlearn: 0.4681279\ttotal: 1m 20s\tremaining: 7m 58s\n",
      "36:\tlearn: 0.4626056\ttotal: 1m 22s\tremaining: 7m 56s\n",
      "37:\tlearn: 0.4570881\ttotal: 1m 25s\tremaining: 7m 57s\n",
      "38:\tlearn: 0.4523629\ttotal: 1m 28s\tremaining: 7m 57s\n",
      "39:\tlearn: 0.4475916\ttotal: 1m 31s\tremaining: 7m 59s\n",
      "40:\tlearn: 0.4437161\ttotal: 1m 34s\tremaining: 8m 1s\n",
      "41:\tlearn: 0.4394026\ttotal: 1m 37s\tremaining: 8m\n",
      "42:\tlearn: 0.4355197\ttotal: 1m 40s\tremaining: 8m 4s\n",
      "43:\tlearn: 0.4319108\ttotal: 1m 44s\tremaining: 8m 9s\n",
      "44:\tlearn: 0.4280363\ttotal: 1m 47s\tremaining: 8m 10s\n",
      "45:\tlearn: 0.4250312\ttotal: 1m 50s\tremaining: 8m 11s\n",
      "46:\tlearn: 0.4226056\ttotal: 1m 54s\tremaining: 8m 12s\n",
      "47:\tlearn: 0.4194087\ttotal: 1m 57s\tremaining: 8m 14s\n",
      "48:\tlearn: 0.4163589\ttotal: 2m\tremaining: 8m 14s\n",
      "49:\tlearn: 0.4133515\ttotal: 2m 3s\tremaining: 8m 13s\n",
      "50:\tlearn: 0.4105571\ttotal: 2m 6s\tremaining: 8m 14s\n",
      "51:\tlearn: 0.4082760\ttotal: 2m 9s\tremaining: 8m 12s\n",
      "52:\tlearn: 0.4061921\ttotal: 2m 12s\tremaining: 8m 11s\n",
      "53:\tlearn: 0.4041795\ttotal: 2m 15s\tremaining: 8m 10s\n",
      "54:\tlearn: 0.4012026\ttotal: 2m 18s\tremaining: 8m 12s\n",
      "55:\tlearn: 0.3984821\ttotal: 2m 21s\tremaining: 8m 11s\n",
      "56:\tlearn: 0.3967903\ttotal: 2m 24s\tremaining: 8m 8s\n",
      "57:\tlearn: 0.3942290\ttotal: 2m 27s\tremaining: 8m 8s\n",
      "58:\tlearn: 0.3923187\ttotal: 2m 31s\tremaining: 8m 10s\n",
      "59:\tlearn: 0.3904324\ttotal: 2m 34s\tremaining: 8m 10s\n",
      "60:\tlearn: 0.3888795\ttotal: 2m 37s\tremaining: 8m 7s\n",
      "61:\tlearn: 0.3867042\ttotal: 2m 39s\tremaining: 8m 4s\n",
      "62:\tlearn: 0.3842211\ttotal: 2m 42s\tremaining: 8m 2s\n",
      "63:\tlearn: 0.3821540\ttotal: 2m 44s\tremaining: 7m 58s\n",
      "64:\tlearn: 0.3797521\ttotal: 2m 47s\tremaining: 7m 57s\n",
      "65:\tlearn: 0.3775575\ttotal: 2m 50s\tremaining: 7m 56s\n",
      "66:\tlearn: 0.3756312\ttotal: 2m 55s\tremaining: 7m 59s\n",
      "67:\tlearn: 0.3735951\ttotal: 2m 58s\tremaining: 7m 58s\n",
      "68:\tlearn: 0.3721122\ttotal: 3m 1s\tremaining: 7m 57s\n",
      "69:\tlearn: 0.3702654\ttotal: 3m 5s\tremaining: 7m 55s\n",
      "70:\tlearn: 0.3686418\ttotal: 3m 7s\tremaining: 7m 53s\n",
      "71:\tlearn: 0.3671784\ttotal: 3m 10s\tremaining: 7m 51s\n",
      "72:\tlearn: 0.3658127\ttotal: 3m 13s\tremaining: 7m 48s\n",
      "73:\tlearn: 0.3649173\ttotal: 3m 16s\tremaining: 7m 47s\n",
      "74:\tlearn: 0.3647584\ttotal: 3m 16s\tremaining: 7m 39s\n",
      "75:\tlearn: 0.3633244\ttotal: 3m 19s\tremaining: 7m 36s\n",
      "76:\tlearn: 0.3620231\ttotal: 3m 22s\tremaining: 7m 33s\n",
      "77:\tlearn: 0.3609154\ttotal: 3m 25s\tremaining: 7m 33s\n",
      "78:\tlearn: 0.3599059\ttotal: 3m 29s\tremaining: 7m 34s\n",
      "79:\tlearn: 0.3590721\ttotal: 3m 35s\tremaining: 7m 38s\n",
      "80:\tlearn: 0.3583636\ttotal: 3m 40s\tremaining: 7m 39s\n",
      "81:\tlearn: 0.3573100\ttotal: 3m 43s\tremaining: 7m 37s\n",
      "82:\tlearn: 0.3563027\ttotal: 3m 46s\tremaining: 7m 35s\n",
      "83:\tlearn: 0.3555898\ttotal: 3m 51s\tremaining: 7m 36s\n",
      "84:\tlearn: 0.3554512\ttotal: 3m 52s\tremaining: 7m 31s\n",
      "85:\tlearn: 0.3547324\ttotal: 3m 55s\tremaining: 7m 28s\n",
      "86:\tlearn: 0.3540958\ttotal: 3m 58s\tremaining: 7m 27s\n",
      "87:\tlearn: 0.3532554\ttotal: 4m 2s\tremaining: 7m 26s\n",
      "88:\tlearn: 0.3527360\ttotal: 4m 5s\tremaining: 7m 24s\n",
      "89:\tlearn: 0.3523237\ttotal: 4m 8s\tremaining: 7m 22s\n",
      "90:\tlearn: 0.3515629\ttotal: 4m 11s\tremaining: 7m 19s\n",
      "91:\tlearn: 0.3508599\ttotal: 4m 13s\tremaining: 7m 15s\n",
      "92:\tlearn: 0.3502515\ttotal: 4m 16s\tremaining: 7m 13s\n",
      "93:\tlearn: 0.3499094\ttotal: 4m 19s\tremaining: 7m 11s\n",
      "94:\tlearn: 0.3495400\ttotal: 4m 23s\tremaining: 7m 9s\n",
      "95:\tlearn: 0.3488345\ttotal: 4m 25s\tremaining: 7m 6s\n",
      "96:\tlearn: 0.3482519\ttotal: 4m 28s\tremaining: 7m 4s\n",
      "97:\tlearn: 0.3476020\ttotal: 4m 31s\tremaining: 7m 1s\n",
      "98:\tlearn: 0.3471516\ttotal: 4m 34s\tremaining: 6m 58s\n",
      "99:\tlearn: 0.3464685\ttotal: 4m 37s\tremaining: 6m 55s\n",
      "100:\tlearn: 0.3460983\ttotal: 4m 39s\tremaining: 6m 52s\n",
      "101:\tlearn: 0.3456292\ttotal: 4m 42s\tremaining: 6m 50s\n",
      "102:\tlearn: 0.3451665\ttotal: 4m 45s\tremaining: 6m 47s\n",
      "103:\tlearn: 0.3447421\ttotal: 4m 48s\tremaining: 6m 45s\n",
      "104:\tlearn: 0.3443215\ttotal: 4m 51s\tremaining: 6m 43s\n",
      "105:\tlearn: 0.3439162\ttotal: 4m 54s\tremaining: 6m 40s\n",
      "106:\tlearn: 0.3438671\ttotal: 4m 55s\tremaining: 6m 34s\n",
      "107:\tlearn: 0.3434402\ttotal: 4m 57s\tremaining: 6m 31s\n",
      "108:\tlearn: 0.3431287\ttotal: 5m\tremaining: 6m 28s\n",
      "109:\tlearn: 0.3431066\ttotal: 5m 1s\tremaining: 6m 23s\n",
      "110:\tlearn: 0.3429098\ttotal: 5m 4s\tremaining: 6m 21s\n",
      "111:\tlearn: 0.3422233\ttotal: 5m 6s\tremaining: 6m 17s\n",
      "112:\tlearn: 0.3418431\ttotal: 5m 9s\tremaining: 6m 15s\n",
      "113:\tlearn: 0.3413777\ttotal: 5m 12s\tremaining: 6m 12s\n",
      "114:\tlearn: 0.3409653\ttotal: 5m 14s\tremaining: 6m 9s\n",
      "115:\tlearn: 0.3406580\ttotal: 5m 17s\tremaining: 6m 6s\n",
      "116:\tlearn: 0.3403693\ttotal: 5m 19s\tremaining: 6m 3s\n",
      "117:\tlearn: 0.3399957\ttotal: 5m 22s\tremaining: 6m\n",
      "118:\tlearn: 0.3396686\ttotal: 5m 24s\tremaining: 5m 57s\n",
      "119:\tlearn: 0.3395713\ttotal: 5m 27s\tremaining: 5m 54s\n",
      "120:\tlearn: 0.3393857\ttotal: 5m 30s\tremaining: 5m 52s\n",
      "121:\tlearn: 0.3392877\ttotal: 5m 33s\tremaining: 5m 50s\n",
      "122:\tlearn: 0.3389335\ttotal: 5m 36s\tremaining: 5m 47s\n",
      "123:\tlearn: 0.3386922\ttotal: 5m 38s\tremaining: 5m 43s\n",
      "124:\tlearn: 0.3383932\ttotal: 5m 40s\tremaining: 5m 40s\n",
      "125:\tlearn: 0.3381452\ttotal: 5m 43s\tremaining: 5m 37s\n",
      "126:\tlearn: 0.3379830\ttotal: 5m 46s\tremaining: 5m 35s\n",
      "127:\tlearn: 0.3376656\ttotal: 5m 48s\tremaining: 5m 32s\n",
      "128:\tlearn: 0.3373247\ttotal: 5m 52s\tremaining: 5m 30s\n",
      "129:\tlearn: 0.3369122\ttotal: 5m 55s\tremaining: 5m 27s\n",
      "130:\tlearn: 0.3366815\ttotal: 5m 57s\tremaining: 5m 25s\n",
      "131:\tlearn: 0.3362785\ttotal: 6m\tremaining: 5m 22s\n",
      "132:\tlearn: 0.3360505\ttotal: 6m 2s\tremaining: 5m 18s\n",
      "133:\tlearn: 0.3358104\ttotal: 6m 5s\tremaining: 5m 16s\n",
      "134:\tlearn: 0.3355033\ttotal: 6m 7s\tremaining: 5m 13s\n",
      "135:\tlearn: 0.3353255\ttotal: 6m 10s\tremaining: 5m 10s\n",
      "136:\tlearn: 0.3351731\ttotal: 6m 13s\tremaining: 5m 8s\n",
      "137:\tlearn: 0.3349847\ttotal: 6m 16s\tremaining: 5m 5s\n",
      "138:\tlearn: 0.3349330\ttotal: 6m 19s\tremaining: 5m 3s\n",
      "139:\tlearn: 0.3346925\ttotal: 6m 22s\tremaining: 5m\n",
      "140:\tlearn: 0.3344579\ttotal: 6m 25s\tremaining: 4m 58s\n",
      "141:\tlearn: 0.3341714\ttotal: 6m 28s\tremaining: 4m 55s\n",
      "142:\tlearn: 0.3339911\ttotal: 6m 30s\tremaining: 4m 52s\n",
      "143:\tlearn: 0.3337300\ttotal: 6m 33s\tremaining: 4m 49s\n",
      "144:\tlearn: 0.3335518\ttotal: 6m 35s\tremaining: 4m 46s\n",
      "145:\tlearn: 0.3333358\ttotal: 6m 38s\tremaining: 4m 43s\n",
      "146:\tlearn: 0.3331009\ttotal: 6m 41s\tremaining: 4m 41s\n",
      "147:\tlearn: 0.3329274\ttotal: 6m 44s\tremaining: 4m 39s\n",
      "148:\tlearn: 0.3326175\ttotal: 6m 48s\tremaining: 4m 36s\n",
      "149:\tlearn: 0.3324708\ttotal: 6m 51s\tremaining: 4m 34s\n",
      "150:\tlearn: 0.3319960\ttotal: 6m 55s\tremaining: 4m 32s\n",
      "151:\tlearn: 0.3318534\ttotal: 6m 58s\tremaining: 4m 29s\n",
      "152:\tlearn: 0.3317104\ttotal: 7m 1s\tremaining: 4m 27s\n",
      "153:\tlearn: 0.3315825\ttotal: 7m 4s\tremaining: 4m 24s\n",
      "154:\tlearn: 0.3312689\ttotal: 7m 7s\tremaining: 4m 21s\n",
      "155:\tlearn: 0.3310903\ttotal: 7m 9s\tremaining: 4m 18s\n",
      "156:\tlearn: 0.3308653\ttotal: 7m 12s\tremaining: 4m 15s\n",
      "157:\tlearn: 0.3305395\ttotal: 7m 14s\tremaining: 4m 12s\n",
      "158:\tlearn: 0.3302220\ttotal: 7m 16s\tremaining: 4m 9s\n",
      "159:\tlearn: 0.3298416\ttotal: 7m 19s\tremaining: 4m 7s\n",
      "160:\tlearn: 0.3297786\ttotal: 7m 22s\tremaining: 4m 4s\n",
      "161:\tlearn: 0.3296047\ttotal: 7m 25s\tremaining: 4m 1s\n",
      "162:\tlearn: 0.3289998\ttotal: 7m 27s\tremaining: 3m 58s\n",
      "163:\tlearn: 0.3286356\ttotal: 7m 30s\tremaining: 3m 56s\n",
      "164:\tlearn: 0.3281520\ttotal: 7m 33s\tremaining: 3m 53s\n",
      "165:\tlearn: 0.3279601\ttotal: 7m 36s\tremaining: 3m 50s\n",
      "166:\tlearn: 0.3279451\ttotal: 7m 37s\tremaining: 3m 47s\n",
      "167:\tlearn: 0.3278893\ttotal: 7m 40s\tremaining: 3m 44s\n",
      "168:\tlearn: 0.3278299\ttotal: 7m 43s\tremaining: 3m 42s\n",
      "169:\tlearn: 0.3274269\ttotal: 7m 46s\tremaining: 3m 39s\n",
      "170:\tlearn: 0.3273759\ttotal: 7m 49s\tremaining: 3m 36s\n",
      "171:\tlearn: 0.3268861\ttotal: 7m 51s\tremaining: 3m 33s\n",
      "172:\tlearn: 0.3263448\ttotal: 7m 54s\tremaining: 3m 31s\n",
      "173:\tlearn: 0.3261303\ttotal: 7m 56s\tremaining: 3m 28s\n",
      "174:\tlearn: 0.3257859\ttotal: 7m 59s\tremaining: 3m 25s\n",
      "175:\tlearn: 0.3254489\ttotal: 8m 1s\tremaining: 3m 22s\n",
      "176:\tlearn: 0.3252528\ttotal: 8m 4s\tremaining: 3m 19s\n",
      "177:\tlearn: 0.3251908\ttotal: 8m 7s\tremaining: 3m 17s\n",
      "178:\tlearn: 0.3249218\ttotal: 8m 10s\tremaining: 3m 14s\n",
      "179:\tlearn: 0.3246218\ttotal: 8m 13s\tremaining: 3m 11s\n",
      "180:\tlearn: 0.3242930\ttotal: 8m 15s\tremaining: 3m 9s\n",
      "181:\tlearn: 0.3241371\ttotal: 8m 18s\tremaining: 3m 6s\n",
      "182:\tlearn: 0.3240590\ttotal: 8m 21s\tremaining: 3m 3s\n",
      "183:\tlearn: 0.3238283\ttotal: 8m 24s\tremaining: 3m\n",
      "184:\tlearn: 0.3235787\ttotal: 8m 27s\tremaining: 2m 58s\n",
      "185:\tlearn: 0.3234383\ttotal: 8m 30s\tremaining: 2m 55s\n",
      "186:\tlearn: 0.3230473\ttotal: 8m 33s\tremaining: 2m 52s\n",
      "187:\tlearn: 0.3227026\ttotal: 8m 36s\tremaining: 2m 50s\n",
      "188:\tlearn: 0.3226362\ttotal: 8m 38s\tremaining: 2m 47s\n",
      "189:\tlearn: 0.3223907\ttotal: 8m 41s\tremaining: 2m 44s\n",
      "190:\tlearn: 0.3221182\ttotal: 8m 44s\tremaining: 2m 41s\n",
      "191:\tlearn: 0.3220454\ttotal: 8m 47s\tremaining: 2m 39s\n",
      "192:\tlearn: 0.3216063\ttotal: 8m 50s\tremaining: 2m 36s\n",
      "193:\tlearn: 0.3214788\ttotal: 8m 53s\tremaining: 2m 33s\n",
      "194:\tlearn: 0.3213458\ttotal: 8m 56s\tremaining: 2m 31s\n",
      "195:\tlearn: 0.3212542\ttotal: 8m 59s\tremaining: 2m 28s\n",
      "196:\tlearn: 0.3211936\ttotal: 9m 1s\tremaining: 2m 25s\n",
      "197:\tlearn: 0.3210625\ttotal: 9m 4s\tremaining: 2m 22s\n",
      "198:\tlearn: 0.3209872\ttotal: 9m 6s\tremaining: 2m 20s\n",
      "199:\tlearn: 0.3208209\ttotal: 9m 9s\tremaining: 2m 17s\n",
      "200:\tlearn: 0.3207301\ttotal: 9m 11s\tremaining: 2m 14s\n",
      "201:\tlearn: 0.3202254\ttotal: 9m 14s\tremaining: 2m 11s\n",
      "202:\tlearn: 0.3200746\ttotal: 9m 16s\tremaining: 2m 8s\n",
      "203:\tlearn: 0.3200021\ttotal: 9m 19s\tremaining: 2m 6s\n",
      "204:\tlearn: 0.3198967\ttotal: 9m 22s\tremaining: 2m 3s\n",
      "205:\tlearn: 0.3198353\ttotal: 9m 24s\tremaining: 2m\n",
      "206:\tlearn: 0.3197149\ttotal: 9m 27s\tremaining: 1m 57s\n",
      "207:\tlearn: 0.3195109\ttotal: 9m 30s\tremaining: 1m 55s\n",
      "208:\tlearn: 0.3194338\ttotal: 9m 32s\tremaining: 1m 52s\n",
      "209:\tlearn: 0.3193479\ttotal: 9m 36s\tremaining: 1m 49s\n",
      "210:\tlearn: 0.3190987\ttotal: 9m 38s\tremaining: 1m 46s\n",
      "211:\tlearn: 0.3190323\ttotal: 9m 41s\tremaining: 1m 44s\n",
      "212:\tlearn: 0.3189166\ttotal: 9m 44s\tremaining: 1m 41s\n",
      "213:\tlearn: 0.3188209\ttotal: 9m 46s\tremaining: 1m 38s\n",
      "214:\tlearn: 0.3185872\ttotal: 9m 49s\tremaining: 1m 35s\n",
      "215:\tlearn: 0.3183898\ttotal: 9m 53s\tremaining: 1m 33s\n",
      "216:\tlearn: 0.3182674\ttotal: 9m 56s\tremaining: 1m 30s\n",
      "217:\tlearn: 0.3182439\ttotal: 9m 58s\tremaining: 1m 27s\n",
      "218:\tlearn: 0.3182253\ttotal: 10m 1s\tremaining: 1m 25s\n",
      "219:\tlearn: 0.3180407\ttotal: 10m 4s\tremaining: 1m 22s\n",
      "220:\tlearn: 0.3179873\ttotal: 10m 7s\tremaining: 1m 19s\n",
      "221:\tlearn: 0.3178418\ttotal: 10m 10s\tremaining: 1m 16s\n",
      "222:\tlearn: 0.3176655\ttotal: 10m 13s\tremaining: 1m 14s\n",
      "223:\tlearn: 0.3175567\ttotal: 10m 15s\tremaining: 1m 11s\n",
      "224:\tlearn: 0.3174316\ttotal: 10m 18s\tremaining: 1m 8s\n",
      "225:\tlearn: 0.3171817\ttotal: 10m 21s\tremaining: 1m 6s\n",
      "226:\tlearn: 0.3171497\ttotal: 10m 24s\tremaining: 1m 3s\n",
      "227:\tlearn: 0.3168817\ttotal: 10m 26s\tremaining: 1m\n",
      "228:\tlearn: 0.3166097\ttotal: 10m 29s\tremaining: 57.8s\n",
      "229:\tlearn: 0.3163669\ttotal: 10m 32s\tremaining: 55s\n",
      "230:\tlearn: 0.3162454\ttotal: 10m 35s\tremaining: 52.3s\n",
      "231:\tlearn: 0.3159676\ttotal: 10m 38s\tremaining: 49.5s\n",
      "232:\tlearn: 0.3157704\ttotal: 10m 40s\tremaining: 46.7s\n",
      "233:\tlearn: 0.3157405\ttotal: 10m 42s\tremaining: 44s\n",
      "234:\tlearn: 0.3156173\ttotal: 10m 45s\tremaining: 41.2s\n",
      "235:\tlearn: 0.3153140\ttotal: 10m 48s\tremaining: 38.5s\n",
      "236:\tlearn: 0.3150765\ttotal: 10m 50s\tremaining: 35.7s\n",
      "237:\tlearn: 0.3149520\ttotal: 10m 52s\tremaining: 32.9s\n",
      "238:\tlearn: 0.3148868\ttotal: 10m 55s\tremaining: 30.2s\n",
      "239:\tlearn: 0.3147694\ttotal: 10m 58s\tremaining: 27.4s\n",
      "240:\tlearn: 0.3146850\ttotal: 11m\tremaining: 24.7s\n",
      "241:\tlearn: 0.3145859\ttotal: 11m 3s\tremaining: 21.9s\n",
      "242:\tlearn: 0.3145482\ttotal: 11m 6s\tremaining: 19.2s\n",
      "243:\tlearn: 0.3143985\ttotal: 11m 9s\tremaining: 16.5s\n",
      "244:\tlearn: 0.3142133\ttotal: 11m 12s\tremaining: 13.7s\n",
      "245:\tlearn: 0.3140269\ttotal: 11m 15s\tremaining: 11s\n",
      "246:\tlearn: 0.3139007\ttotal: 11m 18s\tremaining: 8.24s\n",
      "247:\tlearn: 0.3138306\ttotal: 11m 22s\tremaining: 5.5s\n",
      "248:\tlearn: 0.3137289\ttotal: 11m 25s\tremaining: 2.75s\n",
      "249:\tlearn: 0.3135802\ttotal: 11m 27s\tremaining: 0us\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<catboost.core.CatBoostClassifier at 0x2642c12c988>"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_catboost.fit(X,y,cat_features=categorical_features)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "predicted_y = model_catboost.predict_proba(test[features])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   FACILITY CHANGED      FAIL  FURTHER INSPECTION REQUIRED  \\\n",
      "0          0.000117  0.964726                     0.000447   \n",
      "1          0.000099  0.011888                     0.000214   \n",
      "2          0.000339  0.005653                     0.001760   \n",
      "3          0.002606  0.050248                     0.001082   \n",
      "4          0.000021  0.003390                     0.000086   \n",
      "\n",
      "   INSPECTION OVERRULED      PASS  PASS(CONDITIONAL)  SHUT-DOWN  \n",
      "0              0.000767  0.001673           0.031699   0.000571  \n",
      "1              0.000912  0.983284           0.003182   0.000421  \n",
      "2              0.854253  0.022696           0.000647   0.114653  \n",
      "3              0.030395  0.557305           0.003447   0.354918  \n",
      "4              0.000549  0.991894           0.003853   0.000208  \n"
     ]
    }
   ],
   "source": [
    "df = pd.DataFrame(data = predicted_y, columns = list(submission.columns))\n",
    "print(df.head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.to_excel(r'submission.xlsx', index = None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
